Date: 2022-06-23
R version: 3.5.0
*Corresponding author: matthew.malishev [at] gmail.com
This document can be found at https://github.com/darwinanddavis/UsefulCode

Overview

Same deal as Useful Code, but the second instalment because the first one has too much stuff in it and now runs slow.

Animations

Typing text animation based on typed.js

remotes::install_github("JohnCoene/typed")
require(typed)
typed("Hello")
typed("Emphasis word <span style ='color: red;'>with html</span>.", contentType = "html")
typed(list(shiny::h3("First sentence."), shiny::h4("Second sentence")), typeSpeed = 2)

Colour palettes

Colorspace

require(colorspace)
hcl_palettes(plot = TRUE)  # show all palettes

# https://cran.r-project.org/web/packages/colorspace/vignettes/colorspace.html
require(colorspace)
q4 <- qualitative_hcl(4, palette = "Dark 3")  # discrete
s9 <- sequential_hcl(9, "Purples 3")  # continuous
# for ggplot
scale_color_discrete_sequential(palette = "Purples 3", nmax = 6, order = 2:6)
# for colospace functions: hcl_palettes() %>% str hcl_palettes()['type']

Neon colour palettes

# https://www.shutterstock.com/blog/neon-color-palettes
neon1 <- c("#3B27BA", "#FF61BE", "#13CA91", "#FF9472")
neon2 <- c("#FFDEF3", "#FF61BE", "#3B55CE", "#35212A")
neon3 <- c("#FEA0FE", "#F85125", "#02B8A2", "#535EEB")
neon4 <- c("#535EEB", "#001437", "#C6BDEA", "#FFAA01")
scales::show_col(c(neon1, neon2, neon3, neon4))

Hexadecimal color code for transparency
See https://gist.github.com/lopspower/03fb1cc0ac9f32ef38f4.

require(colorspace)
require(stringr)
colv <- c("#004616", sequential_hcl(5, "Lajolla"))
str_sub(colv, 0, 1) <- "#66"  # add alpha opac to col vector

Lighten/darken colours

require(colorspace)
"#EFEFEF" %>% lighten(0.2)
"#EFEFEF" %>% darken(0.2)

Colour gradient palettes for multi coloured lines/paths/routes

require(colorspace)
require(ggplot2)
require(dplyr)

# data
nn <- 100
df <- data.frame(x = 1:nn, var1 = sample(200, nn, replace = T))

# option 1
colp <- "#f4d29f"
colv <- colorRampPalette(colors = c(colp %>% darken(0.2), colp, colp %>% lighten(0.2)))
colpal <- colv(df$var1 %>% unique %>% length)

# option 2
colpal <- sequential_hcl(df$var1 %>% unique %>% length, "Purple-Blue", power = 0, l = 50)

# plot
ggplot() + geom_line(data = df, aes(1:nrow(df), var1, color = var1), size = 1, show.legend = F, lineend = "round", 
    linejoin = "round") + scale_color_gradientn(colours = colpal, aesthetics = "col")

Chord diagrams

require(circlize,reshape2,tidyr)

# use either melted df or table
dtab <- df %>% # opt1
    dcast(var1~var2, fill = 0) %>% 
    melt() %>%
    uncount(value)
dtab <- df %>% table() # opt2

# plot pars
  par(mar = rep(2, 4),mfrow = c(1, 1))
  circos.clear()
  circos.par(start.degree = -180, # chord setup 
             gap.degree = 2, track.margin = c(-0.1, 0.1), 
             points.overflow.warning = F,
             track.height = 0.2)
  circos.par(cell.padding =c(0.02, 0, 0.02, 0))
  
  # plot 
  chordDiagram(dtab,
               grid.col = colpal,
               transparency = 0.3,
               directional = 1, # 1 = link origin is from sectors
               diffHeight  = -0.05,
               # link.border = "#FFFFFF",
               annotationTrack = c("grid"
                                   # ,"name" # to check name placment
               ),
               annotationTrackHeight = c(0.05, 0.1),
               big.gap = 5, small.gap = 2, # gaps between sectors
               link.sort = F, # order links are drawn
               link.decreasing = T, # define link overlap
               link.largest.ontop = T,
               preAllocateTracks = list(track.height = 0.1)
               # symmetric = F
               # scale = F # weight links equally
  )

Add custom labels

# after initiating plot (see above)
  ylim <- 0.85
  im <- 0.6
  circos.track(track.index = 1,  
               panel.fun = function(x,y){ # add text labels
                 sector_index = get.cell.meta.data("sector.numeric.index")
                 circos.text(x = CELL_META$xcenter, 
                             y = ylim, 
                             # remove medal labels
                             labels = CELL_META$sector.index,
                             facing = "clockwise", niceFacing = T, cex = im, col = col_lab
                 )}
               , bg.border = NA) # set bg to NA

Use custom raster images as sector labels

require(magick)

# create img labels
img <- "img.png"
img_convert <- function(img){
  imgr <- img %>% magick::image_read() %>% as.raster() # convert img to raster layer
  imgr[imgr == "#002163ff"] <- col_lab # change main img color
  imgr %>% return()
}

imgl <- c(as.list(rep(img,dtab$var1 %>% unique %>% length))) # match imgs to no of chord sectors
imglist <- lapply(imgl,img_convert) # apply convert to raster func
imgtab <- c(as.list(rep(NA,n)),imglist) # optional: rm first n images from sectors   
names(imgtab) <- get.all.sector.index() # get names from plot sector indices 

ylim <- 3
im <- "7mm"
circos.track(track.index = 1, 
             for(si in seq_along(get.all.sector.index())){ # apply event img to each sector index
               circos.raster(x = CELL_META$xcenter, #0.5,
                             y = ylim,
                             sector.index = get.all.sector.index()[si],
                             image = imgtab[[si]], # add image
                             width = im, height = im,
                             facing = "downward")
             },
             panel.fun = function(x,y){ # add text labels
               circos.text(x = CELL_META$xcenter, # center label
                           y = ylim/3,
                           labels =  CELL_META$sector.index,
                           facing = "clockwise", niceFacing = T,
                           cex = 0.5, col = col_lab,
                           adj = c(0, 0.5) # nudge xy position
               )}
             , bg.border = NA) # set bg to NA

D3

Links
-
-
-
-
-

D3 and leaflet

# devtools::install_github('jcheng5/d3scatter')
require(pacman)
p_load(d3scatter, crosstalk, leaflet, tibble, httpuv)

# converting df to crosstalk df
sd <- SharedData$new(df)
sd$data()[, "var1"]  # access data.frame
  [1] 174 153 196 177  29  38  33 109   8  33 140  53  23 153 187 172 152  96  26  30  10  68 178
 [24] 119  88  20  68  22  57 148 142  80 112  78  70  46  33  88 103  71  47  44  72   7 183  49
 [47] 194 195  56 184 179 174 177 123  78 164 113 187 186 164 148 187 179  38 139 196  58  99 182
 [70]  83 187  91 110  98 132  64  67  32 167 167  66  99  15 142 190 167 175  70   1  74  54 200
 [93] 145 166 150  16  88 140 165  77
# load data
sd <- SharedData$new(quakes[sample(nrow(quakes), 100), ])
# sd$data() %>% head

bscols(widths = c(12, 6, 6), filter_slider("stations", "Stations", sd, ~stations), leaflet(sd, width = "100%", 
    height = 400) %>% addTiles() %>% addCircleMarkers(lng = sd$data()[, "long"], lat = sd$data()[, "lat"], 
    stroke = F, fill = T, color = "red", fillOpacity = 0.5, radius = ~mag + 2, label = ~paste0("Depth: ", 
        as.character(depth))), d3scatter(sd, width = "100%", height = 400, ~mag, ~depth, color = ~stations))

Add dropdown menu to crosstalk

bscols(widths = c(12, 6, 6), filter_select(id = "stations", label = "Stations", sharedData = sd, group = ~stations))

Convert R code to D3 https://rstudio.github.io/r2d3/articles/visualization_options.html

Create calendar plot

# https://rstudio.github.io/r2d3/articles/gallery/calendar/ install.packages('r2d3')
require(r2d3)
require(readr)
require(dplyr)
require(colorspace)
require(scales)
require(stringr)

# col pal
col <- "PuBuGn"  # seq
col2 <- "Tropic"  # diverge

# seq
pal <- sequential_hcl(12, col)
# pal %>% show_col(borders = NA,labels=F)
paste0("\"", pal, "\"") %>% cat(sep = ",")
"#004533","#005C4E","#00726F","#008795","#0095B5","#56A0C8","#8FACD6","#B6BAE0","#D2CAE7","#E6DAEE","#F5EAF5","#FFF7FD"
# diverge
pal <- diverge_hcl(12, col2)
# pal %>% show_col(borders = NA,labels=F) paste0(''',pal,''') %>% cat(sep=',')

cal <- read_csv("https://raw.githubusercontent.com/rstudio/r2d3/master/vignettes/gallery/calendar/dji-latest.csv")

r2d3(data = cal, d3_version = 4, container = "div", options = list(start = 2006, end = 2011), script = "calendar.js")

Raindrop D3 animate chart

# library(d3rain)

df %>% d3rain(var_category, var_numeric, toolTip = var_colour) %>% drip_settings(dripSequence = "iterate", 
    ease = "bounce", jitterWidth = 20, dripSpeed = 1000, dripFill = colpal) %>% chart_settings(fontFamily = font, 
    yAxisTickLocation = "left")

rCharts
* Bubble
* Scatter
* + more

pacman::p_load(rCharts)
h4 = hPlot(Pulse ~ Height, data = MASS::survey, type = "bubble", group = "Sex", size = "Age", radius = 6, 
    group.na = "Not Available")
h4$chart(zoomType = "xy")
h4$exporting(enabled = F)
# h4$print(include_assets=T) # print d3js output

h4

Data frames

Reversing order of rows in dataframe/entire df

# df = data.frame
require(tidyverse)
df %>% map_df(rev)

Visualise data structure as tree

# explore package
require(DataExplorer)
require(palmerpenguins)
p <- penguins
plot_str(p)

Convert df row values to columns and lengthen df (ideal for tables/matrix inputs)

# convert each distinct value in var2 into new column while maintaining var1
require(reshape2)
require(tidyr)
d1 <- df %>% dcast(var1 ~ var2, fill = 0)  # fill NAs with 0

d1 %>% melt() %>% tidyr::uncount(value)  # lengthen df by each row value

Create custom df from existing data

# no need to rename  
latlon_data <- with(world.cities, data.frame( # //maps
  "city" = name,"country" = country.etc,"lat" = lat,"lon" = long,"population" = pop)
)

Convert df rows to cols and rename

df %>% as_tibble(rownames = "row_names") %>%  # convert cols to rows 
  rename("year" = 1,"percent" = 2) %>% na.omit()

Select df cols based on string

df %>% select(contains("Lat"))
df %>% select(starts_with("Lat"))

dplyr basics

require(dplyr,gapminder)
pacman::p_load(gapminder)

# mutate
africa_ranked <- mutate(gapminder, 
       "African" = continent == "Africa",
       "RankPop" = rank(desc(pop))
       ) 

# summarise data into one line  
gapminder %>% 
  summarise("MinYear" = min(year,na.rm = T),
            "MaxYear" = max(year),
            "CountryCount" = n_distinct(country),
            "Counts" = n()
            )

gapminder %>%  
  summarise(median(lifeExp))

# group by 
gapminder %>%  
  group_by(continent) %>% 
  summarise(median(lifeExp))  

# group by continent and filter by year
gapminder %>% 
  group_by("Continent" = continent) %>% 
  filter(year == 1992) %>% 
  summarise(LifeExpect = median(lifeExp)) -> life_cont_1992

# rename specific cols
df %>% 
  rename(.cols = 2:5, # only rename these cols
         "Country" = 2,
         "Gold" = 3,
         "Silver" = 4,
         "Bronze" = 5)

# replace case when values based on numeric range
df %>% 
  mutate(var1 = case_when(
      between(var2, 1, 5) ~ "A",
      between(var2, 6, 10) ~ "B",
      T ~ var1)
      )
# classic case when
df %>% 
  mutate_at("var1",funs(
    case_when(var1 == 1 ~ "alt1",
              var1 == 2 ~ "alt2",
              T ~ "alt3")))

# get distinct count per grouped var
df %>% group_by(var1) %>% 
  summarise(n = n_distinct(var2))

Execute unfriendly pipe functions inline in pipes

require(palmerpenguins)
require(dplyr)
p <- penguins

# %T>%
p %T>% glimpse %>% select(island)

# with()
p %>% with(lm(body_mass_g ~ flipper_length_mm)) %>% summary()

# %$% when var on lhs is undefined
require(magrittr)
data.frame(z = rnorm(100)) %$% ts.plot(z)

Apply function easily using mutate_at

# eg 1
df %>% mutate_at("var1", ~str_replace_all(., " ", "<br>"))

# eg 2
df %>% mutate_at("layer", ~replace(., is.nan(.), 0))

Expand/fill df by number of repeat instances

require(tidyr)
df %>% select(v1,v2,v3) %>% 
  melt() %>%
  tidyr::uncount(value) %>% # expand df by no. of instances (value from melted table)
  select(variable,v1) # reorder for colpal

Arrange df col by custom order

var_levels <- c("A", "C", "B")  # custom order

# opt 1
df %>% filter(var1 %in% var_levels) %>% arrange(var1 = factor(var1, levels = var_levels))

# with mutate
df %>% mutate(var1 = factor(var1, levels = var_levels)) %>% arrange(var1)

Remove unwanted df row using string arg

df %>% slice(-str_which(var1, "Unwanted point"))

Separate char values within df row into separate columns

df %>% 
  tidyr::separate(col = "ColumnA",
                  into = c("lon", "lat", "elev"), sep = " ", remove = T) %>% # separate char into individual columns 
  mutate_all(as.numeric) 

Get distinct values across multiple columns

# distinct
df %>% distinct(v1, v2, v3, .keep_all = T)

# non-distinct only
df %>% group_by(v1, v2, v3) %>% filter(n() > 1)

# exclude any non-distinct
df %>% group_by(v1, v2, v3) %>% filter(n() == 1)

# base method
df[!duplicated(df[1:3]), ]
df[!duplicated(df[c("var1", "var2"), ]), ]

Split one col into two separate cols and mutate (mutate and mutate_at in one line)

df %>% mutate(Year = var1 %>% str_split_fixed("-", Inf) %>% .[, 1], Month = var1 %>% str_split_fixed("-", 
    Inf) %>% .[, 2]) %>% mutate_at(c("Year", "Month"), as.numeric)

Apply multiple mutate_at functions

# use list for multiple functions of either one or multiple cols
df %>% mutate_at(5:7, list(
  ~str_remove_all(., "string1|string2|"), # func1
  ~str_split_fixed(., "/", n = 2) # func2
))

# Apply multiple `mutate` functions using select or contains
df %>% select(contains("string1")) %>% # select var 
  mutate_all(~str_remove_all(., "string1|string2|") %>% as.numeric) # apply funcs 

Mutate multiple vars

require(lubridate)
df %>% 
  mutate(AM = timevar %>% am,
         Hour = timevar %>% hour,
         Day = timevar %>% day,
         Month = timevar %>% month,
         Meridian = timevar %>% 
           round_date(unit = "hour") %>% # round off hour
           format("%I") %>% as.numeric) # get 12 hour time

Seperate/detect df into categories based on row name

df %>% filter(Restaurants %>% str_detect("Market|Water"))

Generic functions

Convert character class to numeric (ideal when creating colour palettes to turn string cols in df to numeric)

require(dplyr)
set.seed(12)
df <- data.frame(X = LETTERS[sample(20)])
int_vec <- df$X %>% unlist %>% as.factor %>% as.integer  # converts to numbers 
int_vec
df$I <- int_vec
df

Pipe vector to multiple arguments

require(dplyr)

# as list
Sys.time() %>% list(format(., c("%y-%m", "%Y-%m", "%Y-%m")))

# use curly braces to keep original class
Sys.time() %>% {
    format(., c("%y-%m", "%Y-%m", "%Y-%m"))
}

Merge/combine/match/fill rows of two data frames based on value and retain original number of rows

merge(a, b, by = "ID", sort = F)

Access vars in df/tibble that failed to load eg. time series that return NA

# as tibble
df %>% attr("problems")

Search available methods for package

showMethods("coerce", classes = "sf")
methods(st_as_sf)
`?`(methods)

Get object size

df %>% object.size()

Assign multiple values to multiple LHS objects

require(zeallot)

values <- c(1, 2, 3, 4)
c(a, b) %<-% values[c(2, 4)]  # assign `a` and `b`
c(a, b) %<-% c(1, "A")  # returns both as char 

Repeat vector n times (each)

rep(c("A", "B", "C"), 10)
rep(c("A", "B", "C"), each = 10)  # repeat each instance 

Google drive

Access files on Google Drive
Common commands: find, ls, mv, cp, mkdir, rm
http://googledrive.tidyverse.org/

require(googledrive)
drive_find(n_max = 10)  # set output limits
drive_find(type = "folder")
drive_get("~/Data/eli/feb.csv")

HTML/XML

Write html code to dir

code <- "<!DOCTYPE html>
  <html>
<body>

<h1>My First Heading</h1>

<p>My first paragraph.</p>

</body>
</html>"

code <- paste(as.character(code), collapse = "\n")

write.table(code, file = "/Users/code.html", quote = FALSE, col.names = FALSE, row.names = FALSE)

Extracting multiple nodes/range of nodes at once

# require(dplyr,rvest,xml2,readr,magrittr)
url <- "https://www.postholer.com/databook/Appalachian-Trail/3"
url %>% read_html() %>% html_nodes("table") %>% .[1:3]  # get range (node)
url %>% read_html() %>% html_nodes("table") %>% .[[1]]  # get individual (nodeset)

Extract values within nested nodes

require(purrr)
require(dplyr)
require(XML)
doc <- doc  # gpx, xml, or XMLInternalDocument class
getNodeSet(doc, path = "//parentnode") %>% purrr::map(xpathSApply, path = "child1/child2/child3", xmlValue)  # extract values within child 3 node (three nodes deep) and separate into individual lists

# option 2
getNodeSet(gpx2, path = "//folder") %>% lapply(function(x) {
    list(NODE1 = x %>% xpathSApply(path = "placemark/track/coord", xmlValue))
}) %>% bind_rows()

Extract css class/id using xpath

require(rvest)
nid <- "\"class_large\""  # use css class (note quote escape)

url %>% read_html %>% html_nodes(xpath = paste0("//*[@id=", nid, "]"))

Images

Recolor png/svg

require(magick)
png %>% image_colorize(color = "#EFEFEF", opacity = 70)

Interactive plots

Clickme, NVD3, Polychart, rCharts, Rickshaw, and xCharts in R.
Link to collated Github page. ### Javascript in R

Crosstalk, plotly, setting up widgets, customing JS in R, mapping with JS, d3, etc
https://book.javascript-for-r.com/widgets-intro-intro.html#widgets-intro-crosstalk

Leaflet

Interactive label options and custom tiles

require(leaflet)
require(dplyr)
require(geosphere)
require(htmltools)

setview <- c(7.369722, 12.354722)
mp <- data.frame(name = c("Melbourne", "Atlanta"), lat = c(-37.813629, 33.748997), lon = c(144.963058, 
    -84.387985))
latlon_matrix <- matrix(c(mp[, "lon"], mp[, "lat"]), ncol = 2)
custom_tile <- "http://a.sm.mapstack.stamen.com/(positron,(mapbox-water,$776699[hsl-color]),(buildings,$002bff[hsl-color]),(parks,$6abb9d[hsl-color]))/{z}/{x}/{y}.png"
colv <- "#4C3661"
opac <- 0.5
site_names <- mp$name
ttl <- "Debunking Flat Earth theory 101"
weblink <- "https://github.com/darwinanddavis"  # weblink
webname <- "My github"
href <- paste0("<b><a href=", weblink, ">", webname, "</a></b>")
text_label <- paste(sep = "<br/>", href, "606 5th Ave. S", "Seattle, WA 98138")
# label options
marker_label_opt <- labelOptions(textsize = "20px", opacity = 0.5, offset = c(0, 0))
text_label_opt <- labelOptions(noHide = T, direction = "top", textOnly = T, opacity = 1, offset = c(0, 
    0))

# title
tag.map.title <- tags$style(HTML(".leaflet-control.map-title { 
       transform: translate(-50%,20%);
       position: fixed !important;
       left: 50%;
       text-align: center;
       padding-left: 10px; 
       padding-right: 10px; 
       background: white; opacity: 0.7;
       font-weight: bold;
       font-size: 25px;
       }"))
title <- tags$div(tag.map.title, HTML(ttl))


# map
map <- gcIntermediate(latlon_matrix[1, ], latlon_matrix[2, ], n = 100, addStartEnd = T, sp = T) %>% leaflet() %>% 
    setView(setview[2], setview[1], zoom = 3) %>% addTiles(custom_tile) %>% addCircleMarkers(mp[, "lon"], 
    mp[, "lat"], radius = 10, stroke = TRUE, weight = 3, opacity = opac, color = colv, fillColor = colv, 
    label = paste(site_names), labelOptions = marker_label_opt) %>% addPolylines(color = colv, opacity = opac) %>% 
    addPopups(-122.327298, 47.597131, text_label, options = popupOptions(closeButton = FALSE, textOnly = T)) %>% 
    addLabelOnlyMarkers(setview[2], setview[1], label = text_label, labelOptions = text_label_opt) %>% 
    addControl("@darwinanddavis", position = "topright") %>% addControl(title, position = "topleft", 
    className = "map-title")
map

Add multiple layers at once (uses loop)

# https://stackoverflow.com/questions/38701359/grouped-layer-control-in-leaflet-r

# function to plot a map with layer selection
map_layers <- function() {
    
    # number of groups
    k <- n_distinct(quakes$groups)
    
    # base map
    map <- leaflet() %>% addProviderTiles(providers$CartoDB.Positron)
    
    # loop through all groups and add a layer one at a time
    for (i in 1:k) {
        map <- map %>% addCircleMarkers(data = quakes %>% filter(groups == i), group = as.character(i), 
            lng = ~long, lat = ~lat, radius = 1)
    }
    
    # create layer control
    map %>% addLayersControl(overlayGroups = c(1:k), options = layersControlOptions(collapsed = FALSE)) %>% 
        hideGroup(as.character(c(2:k)))  #hide all groups except the 1st one
    
}

# plot the map
map_layers()

Continuous colour legend

# set colpal
colpal <- sequential_hcl(6, "ag_GrnYl")
colv <- colpal[1]
scales::show_col(colpal)

# match data to colpal
pal <- colorNumeric(palette = colpal, domain = df$var1)

map <- leaflet() %>% setView(setview[1], setview[2], zoom = 2) %>% addTiles(custom_tile) %>% addPolygons() %>% 
    addLegend(pal = pal, values = df$var1, position = "bottomright", title = "Legend title", opacity = opac)

Lists

Transpose list (flip list elements)

l <- list(1:2, 3:4, 5:7, 8:10)
l
b <- data.table::transpose(l)
b

lengths for getting length of list indices

require(dplyr)
ls = list(rep(list(sample(50, replace = T)), 5))
ls %>% length
ls %>% lengths
lapply(ls, lengths)

Split list into smaller sublists

la = rep(list(1:5), 6)
names(la) = rep(LETTERS[1:3], 2)
u <- length(unique(names(la)))
n <- length(la)/u
split(la, rep(1:n, each = u))

# for when list has two elements in the name that change create a list of 10 letters with 5 lists in
# each
big_list <- rep(list(1:10), 5) %>% pmap(list)
names(big_list) <- LETTERS[1:10]
# to index the upper list
big_list["B"]  # 1
pluck(big_list, "B")  # 2
# to index the sublists
map(big_list["B"], 3)  # 1
bb_final <- list()  # 2
for (i in 1:10) {
    bb <- big_list["B"]
    bb_final <- c(bb_final, bb)
}
bb_final

Fill list elements with NAs to match length of longest element

# https://stackoverflow.com/questions/34570860/add-nas-to-make-all-list-elements-equal-length

# for single index list
set.seed(1)
ls = replicate(5, sample(1:100, 10), simplify = FALSE)
names(ls) = LETTERS[1:length(ls)]
lapply(ls, `length<-`, max(lengths(ls)))

# for sublists
ls = list(replicate(5, sample(1:100, 10), simplify = FALSE))
n.ticks = 20
fillvec = function(x) {
    nv = lapply(x, `length<-`, n.ticks)  # fill remaining vec with NAs to match total length
    rapply(nv, f = function(x) ifelse(is.na(x), 0, x), how = "replace")  # replace NAs with 0s
}
lapply(ls, fillvec)  # apply fillvec to list

Access list elements in loop by name/string

set.seed(12)

# inputs
time <- 5
time_vec <- 1:10
a_vec <- runif(10)
beta1_vec <- 1:10
beta2_vec <- 11:20
param_vec <- list(a_vec,beta1_vec,beta2_vec)
names(param_vec) <-c("alpha","beta1","beta2") 
params <- sapply(rep(NA,length(param_vec)),list) # create empty final params vector
names(params) <- names(param_vec)

# select parameter to test 
param_input <- "alpha" #beta1 #beta2

# run from here -----------------------------------------------------------
for(time in time_vec) {
  p_in = param_vec[`param_input`][[1]][time] # get parameter value by name
  # create new list of with updated param_input value
  params <- c(param_vec[-which(names(param_vec)==param_input)], # everything but param_input
              param_input = p_in # param_input
              )
  # get just the latest value
  # remove this if you want all list elements
  params <- sapply(params,function(x) x[1]) %>% as.numeric 
  # rename this new list
  names(params) <- c(names(param_vec)[-which(names(param_vec)==param_input)], # everything but param_input
                     param_input
                     )
  print(params)
} # end loop
params # each list element changes depending on user input 

Apply function to nested lists

ls = list(replicate(5, sample(1:100, 10), simplify = FALSE))
ls %>% glimpse
lapply(ls, lapply, mean)
lapply(ls, sapply, mean)  # return as one list 
rapply(ls, mean, how = "unlist")  # unlist, replace, or list

Apply function to list (without lapply)

require(lubridate)
ft <- c(now(), now() %>% rollback(), now() %>% rollback(roll_to_first = T))
ftl <- ft %>% list(isoyear(.), epiyear(.), wday(.), wday(., label = T), qday(.), week(.), semester(.), 
    am(.), pm(.))
names(ftl) <- c("data", "international standard date-time code (ISO 8601)", "epidemiological year", "weekday", 
    "weekday as label", "day into yearly quarter", "week of year", "semester", "AM?", "PM?")

Unlist and bind list elements together

ll %>% # list of sf objects   
  purrr::map(st_collection_extract,"LINESTRING") %>% # get just linestring 
  rlist::list.rbind() # unlist these and bind together 

Convert list to df

require(plyr)
ls %>% plyr::ldply(tibble)  # convert list to df

Conditional rules for lists

require(purrr)
cond <- ls %>% map(class) == "list"  # check if elements are class list
cond %>% unique()
ls %>% keep(cond)  # retain just these entries
ls %>% vec_depth  # check how far list indexes 

Rename list/matrices with pipe/dplyr

require(purrr)
require(magrittr)
# option 1
ls %>% map(matrix, ncol=2) %>%  # turn elements into matrices
  map(`colnames <-`, c("X","Y"))

# option 2
ls %>% map(matrix, ncol=2) %>%  # turn elements into
  map(magrittr::set_colnames, c("X","Y"))

Filter lists by values using pipe/map

require(purrr)
require(magrittr)

df <- ls %>% purrr::map(matrix, ncol = 2) %>% purrr::map(magrittr::set_colnames, c("X", "Y"))  # convert list to matrix

df %>% purrr::map(~.[, "Y"] > 150)  # return logical numeric 

df %>% purrr::map(~. > 150)  # return logical matrix

df %>% purrr::map(~.[.[, "X"] < 30], drop = T)  # return numeric 

Find/detect list elements

ls %>% detect(is.matrix)
ls %>% every(is.matrix)

Find common terms among vectors/lists

intersect(intersect(l1$var1, l2$var1), l3$var1) %>% unique
Reduce(intersect, list(l1$var1, l2$var1, l3$var1)) %>% unique

Microbenchmark

# benchmark

# devtools::install_github('olafmersmann/microbenchmarkCore')
# devtools::install_github('olafmersmann/microbenchmark')

require(microbenchmark)
mbm <- microbenchmark(list = list(b1 = {
    # first operation
}, b2 = {
    # second operation
}))
mbm  # print results
autoplot(mbm)  # plot results 

Plotting

Hand drawn plotting using roughViz.js. Link to package page.

# install.packages('remotes') remotes::install_github('XiangyunHuang/roughviz')
require(roughviz)

Read in data

Read in csv data sources directly from web

# link to raw csv link on e.g. github
require(readr)
url <- "https://raw.githubusercontent.com/plotly/datasets/master/2011_february_aa_flight_paths.csv"
flights <- read_csv(url)

Regex

resource_type <- "algae"
# this regex expression
list.files(pattern = paste0("^", resource_type, "_[0,5]{1}_[0-9]{1,2}_hostpop50_predpop", "[0-9]{1,3}_rep[1-5]{1}\\.R$"))
# returns this begins with resource_type, either 0 or 5 as one integer, 0 to 9 as either one or two
# integers, 0 to 9 as one to three integers, and 1 to 5 as one integer
"algae_0_5_hostpop50_predpop5_rep1.R"
"algae_5_20_hostpop50_predpop30_rep2.R"
"algae_0_15_hostpop50_predpop150_rep5.R"

GUI for regex (2021)
RegExplain
https://www.garrickadenbuie.com/project/regexplain/

devtools::install_github("gadenbuie/regexplain")
source("https://install-github.me/gadenbuie/regexplain")

plotly

HTML widget with plotly and crosstalk

require(pacman)
p_load(plotly,tidyr,crosstalk)

m <- gather(mpg, variable, value, -c(year,cyl)) # data source
msd <- highlight_key(m, ~variable) # var to highlight 
gg <- ggplot(m, aes(factor(year), value)) + # ggplot obj 
  geom_jitter(alpha = 0.3) +
  labs(x = "Year") +
  theme_classic()

# create layout 
bscols(
  widths = c(11, rep(5,2)), # max = 12
  filter_select("id", "Select a variable", msd, ~variable, multiple = F), # dropdown menu
  ggplotly(gg, dynamicTicks = "y") %>% 
    layout(margin = list(l = 30)),
  plot_ly(msd, x = ~jitter(cyl), y = ~value, alpha = ~cyl, linetype = NULL, 
          mode = "markers",
          hoverinfo = "text", 
          text = ~paste0("Cyl: ", round(cyl),
                        "\n",variable,": ", value,
                        "\nYear: ", year)
          ) %>% # interactive vars
    add_markers(alpha = 0.3) %>%
    layout(xaxis = list(showgrid = F, # general plot params 
                        title = "Cylinder"),
           yaxis = list(showgrid = F)
    )
)
# example 2 with changing output margins to fill browser 


require(htmltools)

# title
plotbg <- tags$html(
  HTML("<body style=\"background-color: black;\"></body>"))

resource_type <- "detritus"
memi_df <- readr::read_csv("https://raw.githubusercontent.com/darwinanddavis/mybio/master/data/memi_df.csv")
memi_df <- data.frame(memi_df)
memi_df %>% str
'data.frame':   3600 obs. of  7 variables:
 $ X1              : num  1 2 3 4 5 6 7 8 9 10 ...
 $ ME_EVENT        : chr  "skip30_0" "skip30_0" "skip30_0" "skip30_0" ...
 $ Cercs           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ ControlImpact   : num  0 0 0 0 0 0 0 0 0 0 ...
 $ ControlDay      : num  1 1 1 1 1 1 1 1 1 1 ...
 $ ControlDay_names: chr  "Skip~30" "Skip~30" "Skip~30" "Skip~30" ...
 $ Time            : num  1 2 3 4 5 6 7 8 9 10 ...
# heatmap -----------------------------------------------------------------
require(viridis)
require(ggthemes)
require(plotly)

me_day_vec <- c("skip30","skip60","skip90","skip120")
me_day_names <- as.factor(c("Skip~30", "Skip~60", "Skip~90", "Skip120")) # char vec for labelling facets
ttl <- ""
subttl = ""
xlab <- "Time (days)"
ylab <- "Control intensity"
# turn names into function for labeller for facets
me_im_names <- c("No control","50%", "75%", "90%", "95%", "99%")
dens <- memi_df[,"Cercs"]
yy <- memi_df[,"ControlImpact"]
xx <- memi_df[,"Time"]
facet1 <- memi_df[,"ControlDay"]
p <- ggplot(memi_df,aes(x=xx,y=yy,fill=dens)) +
  geom_tile(colour= "gray",size=0.01, width=2, linetype = 0) +
  scale_fill_viridis(name="Density",option ="magma")
p <- p + facet_wrap(~ ControlDay_names,nrow=length(me_day_names), ncol=1, drop= F, labeller=label_parsed) # use for adding facet labels
# p <-p + facet_wrap(facet1, nrow=3, ncol=1, drop= F)
p <- p + scale_y_continuous(breaks = unique(yy), labels = me_im_names, trans = "reverse")
p <- p + scale_x_continuous(breaks = seq(0,max(xx),30), expand =c(0,2)) 
p <- p + geom_segment(aes(x = 152, xend = 152, y= 4, yend= 4),
                      arrow=arrow(length=unit(0.2,"cm")))   
p <- p + theme_calc() + 
  theme(text = element_text(size=18)) +
  # labs(title= paste0("Density of ",ttl, " by ",subttl), y=ylab, x=xlab) +
  labs(title = paste0("\n","\n",ttl), y=ylab, x=xlab,size=3) +
  theme(plot.title = element_text(vjust=-7)) +
  theme(legend.position = "bottom",legend.direction = "horizontal") +
  theme(legend.text = element_text(size=12)) +
  theme(plot.background = element_rect(fill = "black")) +
  ggpubr::theme_transparent() 
  # plot_it_gg("black","white")

m <- list(
  t = 100,
  r = 1,
  b = 1,
  l = 1,
  padding = 4
)

p <- ggplotly(p)

require(htmlwidgets)
h <- p %>% 
  layout( 
         plot_bgcolor = 'black',
         paper_bgcolor = 'black',
         font = list(color = 'black'),
         autosize = T, margin=m) %>% 
  sizingPolicy(padding = 0, browser.fill = TRUE,plotbg)

Crosstalk example 2

# time series plotly
pacman::p_load(dplyr, lubridate, ggplot2, plotly, gridExtra, plyr, ggthemes)
# install.packages('crosstalk')
library(crosstalk)

# load mock data
df <- readr::read_csv("/Users/malishev/Documents/Data/time_series/call_activity/call_activity.csv")
xinter <- seq(min(df$Date), max(df$Date), length.out = length(df$Date))

# plot data
p <- ggplot() + geom_vline(mapping = NULL, xintercept = xinter, colour = "grey80", size = 0.03) + geom_point(data = df, 
    aes(Date, Hour, color = Person, size = Calls)) + scale_y_continuous(limits = c(1, 23)) + scale_x_datetime(date_breaks = "1 week", 
    date_minor_breaks = "1 day", date_labels = "%D") + theme(axis.text.x = element_text(angle = 45)) + 
    labs(title = "Calls per hour of day", x = "Date (M/D/Y)", y = "Hour of day") + theme(panel.border = element_blank(), 
    panel.grid.major = element_line(color = "gray"), panel.grid.minor = element_line(color = "light gray"), 
    axis.line = element_line(color = "gray"))
p <- p + theme_hc()
ggplotly(p)
# plotly crosstalk
calls_person <- highlight_key(df, ~Hour)
person_person <- highlight_key(df)

pp <- bscols(widths = 12, p1 <- plot_ly(df, x = ~Date, y = ~Hour, color = ~Person, size = ~Calls, type = "scatter", 
    hoverinfo = "text", text = ~paste0("Date: ", Date, "\nName: ", Person, "\nCalls: ", Calls)) %>% layout(title = "Calls per hour of day", 
    xaxis = list(tickangle = 45, showgrid = T), yaxis = list(range = c(0, 23), showgrid = T), margin = list(l = 0.5)), 
    filter_select("id", "Select hour of day", calls_person, ~Hour, multiple = F), p2 <- plot_ly(calls_person, 
        x = ~Person, color = ~Person, type = "histogram") %>% layout(title = "Calls per person", yaxis = list(showgrid = F)))

pp <- htmltools::tagList(list(p1, p2))

# register plotly user Sys.setenv('plotly_username'='malishev') Sys.setenv('plotly_api_key'='apikey')
# ff <- plotly::api_create(p1,username='malishev')

subplot(p1, p2, nrows = 2)
htmltools::knit_print.shiny.tag.list(pp)

Gather/melt dfs to make dfs plotly friendly

require(tidyr)
require(plotly)
sm <- as.data.frame(EuStockMarkets) %>% gather(index, price) %>% mutate(time = rep(EuStockMarkets %>% 
    time(), 4))

sm %>% plot_ly(x = sm$time, y = sm$price, color = sm$index)

Strings

Detect strings in data frame or vector based on partial pattern. Useful when you don't know the complete name of data frame col.

df_names <- df %>% pull(var1) %>% unique
df %>% filter(var1 == df_names[str_detect(df_names, "va")])  # use partial string to pull df col

# dplyr version
iris %>% select(starts_with("Sepal"), contain("Sepal"), matches("Sepal"))

Convert multiple strings per vectors into separate numeric vectors or df cols

str1 <- c("-123.233786 49.553438 41.41", "-123.233715 49.553847 42.5", "-123.233645 49.55426 43.8") 

str1 %>% 
  as.data.frame() %>% 
  tidyr::separate(col = ".",into = c("lon", "lat", "elev"), sep = " ", remove = T) %>% # separate char into individual values
  mutate_all(as.numeric) 
        lon      lat  elev
1 -123.2338 49.55344 41.41
2 -123.2337 49.55385 42.50
3 -123.2336 49.55426 43.80

Replace all spaces with breaks (ideal for adding ggrichtext labels)

df %>% mutate_at("city", ~str_replace_all(., " ", "<br>"))

Insert characters/string to larger string

require(stringi)
"In the beginning" %>% stri_sub_replace(1, 2, value = "TEXT")  # set first and second char order to replace 

"In the beginning" %>% stri_sub_replace(2, 1, value = "TEXT")  # set second and first char order to insert    

Extract text from between characters, esp special characters

require(qdapRegex)
# works for multiple instances
txt <- "${ extract only this text } and also ${ get this text }"
qdapRegex::ex_between(txt, "${", "}")

# define left and right boundaries to extract multiple instances
txt <- "${ extract only this text } and also $[ get this text ]"
qdapRegex::ex_between(txt, c("${", "$["), c("}", "]"))

Check for numeric elements in a string

pacman::p_load(varhandle)
df$Value %>% check.numeric()

Extract integers from alphanumeric string

require(stringr)
string1 <- "3,160.72 A$"

# extracting numbers
string1 %>% str_split_fixed(".*?[0-9]", 2)
string1 %>% str_split_fixed("[[:digit:]]+", 2)
string1 %>% str_split_fixed("", 3)
gsub("[0-9]+", "", string1)
string1 %>% str_extract_all("[[:digit:]]+", 3)
string1 %>% str_replace_all("[:digit:]", " ")

gsub("[a-z]", "", string1)
gsub(".*?([0-9]+).*", "\\1", string1)

string1 %>% str_remove_all("[[:alpha:]]")  # rm all letters

# split by ',' and convert to numeric
string1 %>% str_split_fixed("\\p{WHITE_SPACE}", Inf) %>% as.vector() %>% str_replace_all(",", "") %>% 
    as.numeric()

# rm letters and special chars
string1 %>% str_remove_all("[a-zA-Z[^[:alnum:]]]+[$]") %>% str_replace_all(",", "") %>% as.numeric()

SVG

Read in svg

require(XML)
imgr <- "img1.svg" %>% xmlParse()

Time

Get just HMS portion of POSIX class

require(dplyr)
Sys.time() %>% format(format = "%H:%M:%S")

Convert character to hms format, esp for erraneous timedate data

pacman::p_load(lubridate, hms)

df %>% pull(var1) %>% as.factor %>% lubridate::hms() %>% period_to_seconds() %>% hms::as_hms()

Timezone converter

# get local tz
require(dplyr)
require(lubridate)
require(stringr)
itz <- "2021-02-01T02:22:59.000Z"
lubridate::ymd_hms(itz) %>% with_tz(OlsonNames()[OlsonNames() %>% str_which("Melb")])

Set time to 12 hour with AM/PM

require(lubridate)
df$time %>% as_datetime() %>% format("%d/%m %I:%M %p")  # set time to 12 hour with AM/PM

Convert character string to just year/month

# adds ambiguous day to vector, which can be ignored or removed

require(anytime)
require(lubridate)
# '1982-04' '1982-05' '1982-06' '1982-07'
df$Time %>% anytime::anydate()  # opt 1
df$Time %>% lubridate::parse_date_time("ym")  # opt 2
as.POSIXct(df$Time, format = "%Y-%M")  # opt 3

Round off datetime

require(lubridate)
df %>% mutate(Meridian = timevar %>% 
           round_date(unit = "hour") %>% # round off hour
           format("%I") %>% as.numeric) # get 12 hour time

Tables

Summary tables

remotes::install_github("ddsjoberg/gtsummary")
tbl_summary(
    trial2,
    by = trt, # split table by group
    missing = "no" # don't list missing data separately
  ) %>%
  add_n() %>% # add column with total number of non-missing observations
  add_p() %>% # test for a difference between groups
  modify_header(label = "**Variable**") %>% # update the column header
  bold_labels() 

Webscraping

Download data directly from web

url <- "https://www.data.com/test.xlsx"

httr::GET(url, httr::write_disk("test.xlsx", overwrite = T))